# 图片批量下载 下载京客隆 财务资料
import os.path
from lxml import etree
import requests
import pandas as pd

# 创建目录
if not os.path.exists("./file/财务资料"):
    os.mkdir("./file/财务资料")

UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0"
keys = []
values = []
# https://www.jkl.com.cn/newsList.aspx?current=1&TypeId=10009
page=1
while 1:
    url=f"https://www.jkl.com.cn/newsList.aspx?current={page}&TypeId=10009"
    html=requests.get(url=url,headers={'User-Agent':UA}).text
    element=etree.HTML(html)

    pdf_names=element.xpath('//div[@class="newsLis"]//li/a/text()')
    pdf_urls=element.xpath('//div[@class="newsLis"]//li/a/@href')
    page_end=element.xpath("//div[@class='newsLis']//a[text()='尾页']/@href")

    for pdf_name in pdf_names:
        pdf_name=pdf_name.strip()
        keys.append(pdf_name)
    for pdf_url in pdf_urls:
        pdf_url="https://www.jkl.com.cn/"+pdf_url
        values.append(pdf_url)
    if not page_end:
        break
    page+=1

dict_pdf=dict(zip(keys,values))
for key,value in dict_pdf.items():

    suffix=value.split(".")[-1]
    pdf_data=requests.get(value,headers={'User-Agent':UA}).content
    pdf_path="./file/财务资料/" + key + "." + suffix
    with open(pdf_path,"wb") as f:
        f.write(pdf_data)
        print(key,"下载完成")